{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# High-level RNN MXNet Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import numpy as np\n",
    "import mxnet as mx\n",
    "from mxnet.io import DataDesc\n",
    "from common.params_lstm import *\n",
    "from common.utils import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Force one-gpu\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OS:  linux\n",
      "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
      "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
      "Numpy:  1.14.1\n",
      "MXNet:  0.12.0\n",
      "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
      "CUDA Version 8.0.61\n",
      "CuDNN Version  6.0.21\n"
     ]
    }
   ],
   "source": [
    "print(\"OS: \", sys.platform)\n",
    "print(\"Python: \", sys.version)\n",
    "print(\"Numpy: \", np.__version__)\n",
    "print(\"MXNet: \", mx.__version__)\n",
    "print(\"GPU: \", get_gpu_name())\n",
    "print(get_cuda_version())\n",
    "print(\"CuDNN Version \", get_cudnn_version())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_symbol(CUDNN=True,\n",
    "                  maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, maxl=MAXLEN):\n",
    "    # https://mxnet.incubator.apache.org/api/python/rnn.html\n",
    "    data = mx.symbol.Variable('data')\n",
    "    embedded_step = mx.symbol.Embedding(data=data, input_dim=maxf, output_dim=edim)\n",
    "    \n",
    "    # Fusing RNN layers across time step into one kernel\n",
    "    # Improves speed but is less flexible\n",
    "    # Currently only supported if using cuDNN on GPU\n",
    "    if not CUDNN:\n",
    "        gru_cell = mx.rnn.GRUCell(num_hidden=nhid)\n",
    "    else:\n",
    "        gru_cell = mx.rnn.FusedRNNCell(num_hidden=nhid, num_layers=1, mode='gru')\n",
    "    \n",
    "    begin_state = gru_cell.begin_state()\n",
    "    # Call the cell to get the output of one time step for a batch.\n",
    "    # TODO: TNC layout (sequence length, batch size, and feature dimensions) is faster for RNN\n",
    "    outputs, states = gru_cell.unroll(length=maxl, inputs=embedded_step, merge_outputs=False, layout='TNC')\n",
    "    \n",
    "    fc1 = mx.symbol.FullyConnected(data=outputs[-1], num_hidden=2) \n",
    "    input_y = mx.symbol.Variable('softmax_label')  \n",
    "    m = mx.symbol.SoftmaxOutput(data=fc1, label=input_y, name=\"softmax\")\n",
    "    return m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def init_model(m, batchs=BATCHSIZE, maxl=MAXLEN, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n",
    "    ctx = [mx.gpu(0)]\n",
    "    mod = mx.mod.Module(context=ctx, symbol=m)\n",
    "    mod.bind(data_shapes=[DataDesc(name='data', shape=(maxl, batchs), layout='TNC')],\n",
    "             label_shapes=[DataDesc(name='softmax_label', shape=(batchs,))])\n",
    "    # Glorot-uniform initializer\n",
    "    mod.init_params(initializer=mx.init.Xavier(rnd_type='uniform'))\n",
    "    mod.init_optimizer(optimizer='Adam', \n",
    "                       optimizer_params=(('learning_rate', lr),\n",
    "                                         ('beta1', b1),\n",
    "                                         ('beta2', b2),\n",
    "                                         ('epsilon', eps)))\n",
    "    return mod"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Preparing train set...\n",
      "Preparing test set...\n",
      "Trimming to 30000 max-features\n",
      "Padding to length 150\n",
      "(25000, 150) (25000, 150) (25000,) (25000,)\n",
      "int32 int32 int32 int32\n",
      "CPU times: user 5.59 s, sys: 391 ms, total: 5.98 s\n",
      "Wall time: 5.98 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Data into format for library\n",
    "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n",
    "wrapper_db = lambda args: mx.io.DataBatch(data=[mx.nd.array(args[0])], label=[mx.nd.array(args[1])])\n",
    "\n",
    "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
    "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 43.3 ms, sys: 0 ns, total: 43.3 ms\n",
      "Wall time: 42.6 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Load symbol\n",
    "sym = create_symbol()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 901 ms, sys: 521 ms, total: 1.42 s\n",
      "Wall time: 1.43 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Initialise model\n",
    "model = init_model(sym)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0, Training ('accuracy', 0.7873397435897436)\n",
      "Epoch 1, Training ('accuracy', 0.9302083333333333)\n",
      "Epoch 2, Training ('accuracy', 0.9705128205128205)\n",
      "CPU times: user 21 s, sys: 4.39 s, total: 25.4 s\n",
      "Wall time: 23.7 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Main training loop: 12.7s\n",
    "metric = mx.metric.create('acc')\n",
    "for j in range(EPOCHS):\n",
    "    metric.reset()\n",
    "    for batch in map(wrapper_db, yield_mb_tn(x_train, y_train, BATCHSIZE, shuffle=True)):\n",
    "        model.forward(batch) \n",
    "        model.update_metric(metric, batch.label)\n",
    "        model.backward()              \n",
    "        model.update()\n",
    "    print('Epoch %d, Training %s' % (j, metric.get()))"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
